################################################################## 
# R code: Exercise 2.10(b)
# File: CSETAR_estimation.r
#
# Set of R functions for estimating CSETAR models, programmed by
# K.S. Chan.
#
# Reference:
# Chan, K.S. and Tsay, R.S. (1998).
#   Limiting properties of the least squares estimator of a  
#   continuous threshold autoregressive model.
#   Biometrika, 85(2), 413-426.
#   DOI: 10.1093/biomet/85.2.413. 
##################################################################
 "create.x" <- function(x,start,p,d){
# Create the matrix [x(t-1),..x(t-p),x(t-d)]; 
# no x(t-d) between x(t-1) and x(t-p).
# This function is called by main 
        n  <- length(x)
        xy <- NULL
        for(i in (1:p))
           xy <- cbind(xy, x[(start - i):(n - i)])
           xy <- xy[,  - d]
           cbind(xy, x[(start - d):(n - d)])
}
"derivative" <- function(x, r, is.constant.jump = F){
        m <- dim(x)[2]
        z <- x[, m]
        res <- cbind(x[,- m], pmin(z-r, 0),pmax(z-r,0))
        if(is.constant.jump) {
          I   <- z <= r
          res <- cbind(x[,c(-1,- m)],I,I * z,(1 - I),(1 - I)*z)
        }
        res
}
"formh" <- function(x,theta,resi,covariate = NULL){
#       This function is a "work horse" of main
        theta1     <- rev(theta)
        r          <- theta1[1]
        phid.minus <- theta1[3]
        phid.plus  <- theta1[2]
        m <- dim(x)[2]
        z <- x[, m]
        I <- z <= r
        work <- cbind(1,x[,- m],pmin(z - r,0),pmax(z - r, 0),-(
                phid.minus * I+phid.plus*(1 - I)),covariate)
                apply(work, 2, mult, resi)
}
"is.missing" <- function(x){
        any(x == "NA")
}
"mult" <- function(a, b){
        a * b
}
"main" <- function(y,p,d,start=p+1,a=0.25,b=0.75,Print=T,series.name= 
        "", is.grid.search=T,npts.grid=100,standard=F,covariate=NULL,
        covariate.name=NULL){
# 
#  This function fits a continuous TAR model with 
#  delay d and order p in both regimes. 
#  It uses the method of grid search to minimize the 
#  sum of squared one-step ahead error. 
#  INPUT:
#  y     = time series
#  start = starting value
#  p     = order
#  d     = delay
#  a     = beginning fraction of obs over which the search is done 
#          (default=.25)
#  b     = ending " " (default=.75)
#  npts.grid = no of grid points (default=100) 
#  standard  = logical variable of whether or not to standardize the data
#              (default=F)
#  covariate = matrix of covariates. (default=none)
#  covariate.name = labels for the covariates 
#            
#  OUTPUT:
#  thd  = estimated thd
#  ls   = the ls object of the regression fit with the threshold
#         fixed at thd
#  coef = estimated coefficients=(phi0,phi1,...,phip,phid-,phid+,r)
#  cov  = var-cov matrix for coef
#  sigma1 = noise std dev in the lower regime
#  sigma2 = noise std dev in the upper regime
   AIC.linear <- NA
   lab <- c("constant", paste("lag", 1:p, sep = ""))
   lab <- lab[ -(d + 1)]
   lab <- c(lab,paste(paste("lag",d,sep=""),c("-","+"),sep=""), 
          "threshold", covariate.name)
   y1  <- y[y != "NA"]
   if(standard)
      y <- (y - mean(y1))/var(y1)^0.5
      n <- length(y1)
      old.cand <- sort(y1[(start:n)-d])
      n.cand <- length(old.cand)
      lbound <- sum(old.cand == min(old.cand))
      ubound <- sum(old.cand == max(old.cand))
      s <- (a1<-max((2*p+1),lbound+p+1,round(n.cand*a))):(b1<- 
                min(n.cand -(2*p+1),n.cand-ubound-(p+1),round(
                n.cand*b)))
      cand <- old.cand[s]
  if(is.grid.search)
    cand  <- old.cand[a1]+((old.cand[b1]-old.cand[a1])*(0:
                     npts.grid))/npts.grid
    c1    <- cand[1]
    frac1 <- floor(sum(old.cand <= c1)/n.cand*100)
    c2    <- cand[cand[length(cand)]]
    frac2 <- ceiling(sum(old.cand <= c2)/n.cand*100)
    mse   <- 0*cand
    x     <- create.x(y,start,p,d)
    y2    <- y[ -(1:p)]
 if(is.matrix(covariate))
 covariate <- covariate[ -(1:p),]
 else covariate <- covariate[ -(1:p)]
 i <- 1
 for(r in cand) {
     xwork  <- cbind(derivative(x,r),covariate)
     select <- !apply(cbind(xwork, y2),1,is.missing)
     mse[i] <- ls.diag(lsfit(xwork[select,],y2[select]))$std.dev
     i <- i + 1
 }
 thd    <- cand[sortindex <- sort.list(mse)[1]]
 xwork  <- cbind(derivative(x, thd), covariate)
 select <- !apply(cbind(xwork, y2), 1, is.missing)
 ls     <- lsfit(xwork[select,  ], y2[select])
 theta  <- c(ls$coef, thd)
 n      <- sum(select)
 n1     <- n - p
 if(is.matrix(covariate))
   covariate1 <- covariate[select,  ]
 else covariate1 <- covariate[select]
   V <- formh(x[select,  ,drop=F],theta,ls$residuals,covariate= 
              covariate1)
   V <- t(V) %*% V/n
   U <- formh(x[select,  ,drop=F],theta,1,covariate=covariate1)
   U <- t(U) %*% U/n
   Uinv <- solve(U)
   cov  <- Uinv %*% V %*% Uinv/n
   coef <- c(ls$coef, thd)
   z    <- x[select, dim(x)[2]]
   I    <- z <= thd
   sigma1  <- sum(ls$residuals^2 * I)/(sum(I))
   sigma1  <- sigma1^0.5
   sigma2  <- sum(ls$residuals^2 * (1 - I))/sum(1 - I)
   sigma2  <- sigma2^0.5
   res     <- ls$residuals
   std.res <- ((res * I)/sigma1) + ((res * (1 - I))/sigma2)
   fit     <- y2[select] - res
   std.err <- diag(cov)^0.5
   coef    <- c(ls$coef, thd)
   names(coef)    <- lab
   dimnames(cov)  <- list(lab, lab)
   names(std.err) <- lab
   coef.stder     <- signif(rbind(coef, std.err), 4)
   dimnames(coef.stder) <- list(c("coef", "std. err."), lab)
   if(Print) {
     n <- length(y1)
if(series.name != "")
 cat(" \n\n The time series being analyzed is ",series.name,"\n")
 cat(" CLS estimation of \n continuous TAR model with p =\n", 
       round(p, 3), " d = ", round(d, 3), "\n")
 cat("\n threshold searched from the ", signif(a1/n * 100,3), 
     " percentile  to the ", signif(b1/n * 100, 3), 
     " percentile\n")
if(is.grid.search)
 cat(" using a grid search of ", npts.grid + 1, 
     " points evenly distributed\n over the range (", 
       signif(range(cand), 4), ")\n")
 cat("\n The fitted model is: \n")
 print(coef.stder)
 cat("\n")
 cat(" RMS for 1st (<= threshold) \n and 2nd regime resp. are = ",
 signif(sigma1^2, 4), ",", signif(sigma2^2, 4), "\n")
 cat("\n")
 cat(" no. of observations in 1st regime (<= threshold)", 
     " and 2nd regime\n resp. are  = ", sum(I), 
     ",", sum(1 - I), "\n")
 cat(" The threshold estimate is approximately the ",signif(sum(
       old.cand <= thd)/n.cand * 100, 3), 
     " percentile of all threshold data\n")
 cat("\n (In the case that the threshold estimate occurs at a tie,",
      "\n the above percentile may appear to lie outside ",
      "\n the search range for the threshold.)\n\n")
        }
invisible(list(thd=thd,ls=ls,cand=cand,mse=mse,cov=cov, 
          coef=coef,sigma1=sigma1,sigma2=sigma2,std.err= 
          std.err,std.res=std.res,n1=sum(I),n2=sum(1 - I),lab
          = list(c("coef","std.err."),lab),fit=fit))
}
